import os
import sys
import requests
import json
import re
from lxml import etree

cwd = os.getcwd()
sys.path.append(cwd)
print(cwd)

from pub_func import DownloadFile, folder_exist, file_write

base_url = 'http://soso.huitu.com/Search/GetAllPicInfoNew'

headers = {
    'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
}

params = {
    'kw':'123',
    'page':'1', # 当前页数
    's1':'1', # 类型，1:拍摄，2:设计
    'perPageSize':'100', # 每页显示最大图片个数
}

if __name__ == '__main__':
    cloud_type = ['絮状高积云','透光高积云','荚状高积云','积云性高积云','蔽光高积云',
    '堡状高积云','透光高层云','蔽光高层云','伪卷云','密卷云',
    '毛卷云','钩卷云','卷积云','匀卷层云','毛卷层云',
    '雨层云','碎雨云','碎积云','浓积云','淡积云',
    '鬃积雨云','秃积雨云','碎层云','层云','透光层积云',
    '荚状层积云','积云性层积云','蔽光层积云','堡状层积云',]
    base_dir = '/media/ubuntu/08f29263-3882-43db-96fd-febb6f2e2e5f/cloud_type'
    domain_name = 'huitu.com'
    dir_path = os.path.join(base_dir, domain_name)
    folder_exist(dir_path)
    for cloud_index in range(len(cloud_type)):
    # for cloud_index in range(1):
        count = 0
        word = cloud_type[cloud_index]

        params['kw'] = word
        response = requests.get(url=base_url, headers=headers, params=params)
        response.encoding = 'utf8'
        json_info = json.loads(response.text)

        max_page = json_info['TotalPage']

        for i in range(1, int(max_page)+1):
            params['page'] = str(i)
            params['kw'] = word
            response = requests.get(url=base_url, headers=headers, params=params)
            response.encoding = 'utf8'
            # print(response.text)
            json_info = json.loads(response.text)
            # print(json_info)
            list_url_info = json_info['r']
            # print(list_url)
            for url_info in list_url_info:
                count += 1
                url_img = url_info['imgUrl']
                img_desc = url_info['pic_name']
                img_name = str(cloud_index+1) + '_' + str(count) + '.jpg'
                content = img_name + ',' + img_desc + ',' + url_img + ',\n'
                print(content, i)

                file_name = domain_name.split('.')[0] + '_' + str(cloud_index+1) + '.csv'
                file_path = os.path.join(dir_path, file_name)
                file_write(file_path, content, 'a')
